home *** CD-ROM | disk | FTP | other *** search
/ Visual Cafe 3 / Visual Cafe 3.ISO / Vcafe / Main.bin / CharacterBreakData.java < prev    next >
Text File  |  1998-09-22  |  14KB  |  269 lines

  1. /*
  2.  * @(#)CharacterBreakData.java    1.9 98/01/12
  3.  *
  4.  * (C) Copyright Taligent, Inc. 1996 - All Rights Reserved
  5.  * (C) Copyright IBM Corp. 1996 - All Rights Reserved
  6.  *
  7.  * Portions copyright (c) 1996 Sun Microsystems, Inc. All Rights Reserved.
  8.  *
  9.  *   The original version of this source code and documentation is copyrighted
  10.  * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
  11.  * materials are provided under terms of a License Agreement between Taligent
  12.  * and Sun. This technology is protected by multiple US and International
  13.  * patents. This notice and attribution to Taligent may not be removed.
  14.  *   Taligent is a registered trademark of Taligent, Inc.
  15.  *
  16.  * Permission to use, copy, modify, and distribute this software
  17.  * and its documentation for NON-COMMERCIAL purposes and without
  18.  * fee is hereby granted provided that this copyright notice
  19.  * appears in all copies. Please refer to the file "copyright.html"
  20.  * for further important copyright and licensing information.
  21.  *
  22.  * SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
  23.  * THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
  24.  * TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
  25.  * PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR
  26.  * ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
  27.  * DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
  28.  *
  29.  */
  30.  
  31. package java.text;
  32.  
  33. /**
  34.  * The CharacterBreakData contains data used by SimpleTextBoundary
  35.  * to determine character breaks.
  36.  * @see #BreakIterator
  37.  */
  38. final class CharacterBreakData extends TextBoundaryData
  39. {
  40.     private static final byte accent_diacritic = 0;
  41.     private static final byte baseForm = 1;
  42.     private static final byte baseCR = 2;
  43.     private static final byte baseLF = 3;
  44.     private static final byte choseong = 4;   // Korean initial consonant
  45.     private static final byte jungseong = 5;  // Korean vowel
  46.     private static final byte jongseong = 6;  // Korean final consonant
  47.     private static final byte EOS = 7;
  48.     private static final int COL_COUNT = 8;
  49.  
  50.     private static final byte SI = (byte)0x80;
  51.     private static final byte STOP = (byte) 0;
  52.     private static final byte SI_STOP = (byte)SI + STOP;
  53.  
  54.     private static final byte kCharacterForwardData[] =
  55.     {
  56.         // acct        base             cr              lf
  57.         // cho         jung             jong            EOS
  58.         STOP,          STOP,            STOP,           STOP,
  59.         STOP,          STOP,            STOP,           STOP,
  60.  
  61.         // 1
  62.         (byte)(SI+2),  (byte)(SI+2),    (byte)(SI+3),   (byte)(SI+7),
  63.         (byte)(SI+4),  (byte)(SI+5),    (byte)(SI+6),   SI_STOP,
  64.  
  65.         // 2
  66.         (byte)(SI+2),  SI_STOP,         SI_STOP,        SI_STOP,
  67.         SI_STOP,       SI_STOP,         SI_STOP,        SI_STOP,
  68.  
  69.         // 3
  70.         SI_STOP,       SI_STOP,         SI_STOP,        (byte)(SI+7),
  71.         SI_STOP,       SI_STOP,         SI_STOP,        SI_STOP,
  72.  
  73.         // 4
  74.         (byte)(SI+2),  SI_STOP,         SI_STOP,        SI_STOP,
  75.         (byte)(SI+4),  (byte)(SI+5),    (byte)(SI+6),   SI_STOP,
  76.  
  77.         // 5
  78.         (byte)(SI+2),  SI_STOP,         SI_STOP,        SI_STOP,
  79.         SI_STOP,      (byte)(SI+5),    (byte)(SI+6),    SI_STOP,
  80.  
  81.         // 6
  82.         (byte)(SI+2),  SI_STOP,         SI_STOP,        SI_STOP,
  83.         SI_STOP,       SI_STOP,         (byte)(SI+6),   SI_STOP,
  84.  
  85.         // 7
  86.         SI_STOP,       SI_STOP,         SI_STOP,        SI_STOP,
  87.         SI_STOP,       SI_STOP,         SI_STOP,        SI_STOP
  88.     };
  89.     private static final WordBreakTable kCharacterForwardTable =
  90.     new WordBreakTable(COL_COUNT, kCharacterForwardData);
  91.     private static final byte kCharacterBackwardData[] =
  92.     {
  93.         // acct         base            cr              lf
  94.         // cho          jung            jong            EOS
  95.         STOP,           STOP,           STOP,           STOP,
  96.         STOP,           STOP,           STOP,           STOP,
  97.  
  98.         // 1
  99.         (byte)(SI+1),   SI_STOP,        SI_STOP,        (byte)(SI+1),
  100.         SI_STOP,        (byte)(SI+1),   (byte)(SI+1),   SI_STOP
  101.     };
  102.  
  103.     private static final WordBreakTable kCharacterBackwardTable =
  104.     new WordBreakTable(COL_COUNT, kCharacterBackwardData);
  105.     private static final int kRawMapping[] =
  106.     {
  107.         baseForm, //UNASSIGNED      = 0,
  108.         baseForm, //UPPERCASE_LETTER    = 1,
  109.         baseForm, //LOWERCASE_LETTER    = 2,
  110.         baseForm, //TITLECASE_LETTER    = 3,
  111.         baseForm, //MODIFIER_LETTER     = 4,
  112.         baseForm, //OTHER_LETTER        = 5,
  113.         accent_diacritic, //NON_SPACING_MARK    = 6,
  114.         accent_diacritic, //ENCLOSING_MARK      = 7,
  115.         baseForm, //COMBINING_SPACING_MARK  = 8,
  116.         baseForm, //DECIMAL_DIGIT_NUMBER    = 9,
  117.         baseForm, //LETTER_NUMBER       = 10,
  118.         baseForm, //OTHER_NUMBER        = 11,
  119.         baseForm, //SPACE_SEPARATOR     = 12,
  120.         baseForm, //LINE_SEPARATOR      = 13,
  121.         baseForm, //PARAGRAPH_SEPARATOR = 14,
  122.         baseForm, //CONTROL         = 15,
  123.         baseForm, //FORMAT      = 16,
  124.         baseForm, //????            = 17,
  125.         baseForm, //PRIVATE_USE     = 18,
  126.         baseForm, //SURROGATE        = 19,
  127.         baseForm, //DASH_PUNCTUATION    = 20,
  128.         baseForm, //START_PUNCTUATION    = 21,
  129.         baseForm, //END_PUNCTUATION     = 22,
  130.         baseForm, //CONNECTOR_PUNCTUATION   = 23,
  131.         baseForm, //OTHER_PUNCTUATION   = 24,
  132.         baseForm, //MATH_SYMBOL     = 25,
  133.         baseForm, //CURRENCY_SYMBOL     = 26,
  134.         baseForm, //MODIFIER_SYMBOL     = 27,
  135.         baseForm, //OTHER_SYMBOL        = 28;
  136.     };
  137.  
  138.     private static final SpecialMapping kExceptionChar[] = //{};
  139.     {
  140.         new SpecialMapping(ASCII_LINEFEED, baseLF),
  141.         new SpecialMapping(ASCII_CARRIAGE_RETURN, baseCR),
  142.         new SpecialMapping(HANGUL_CHOSEONG_LOW, HANGUL_CHOSEONG_HIGH, choseong),
  143.         new SpecialMapping(HANGUL_JUNGSEONG_LOW, HANGUL_JUNGSEONG_HIGH, jungseong),
  144.         new SpecialMapping(HANGUL_JONGSEONG_LOW, HANGUL_JONGSEONG_HIGH, jongseong),
  145.         new SpecialMapping(PUNCTUATION_LINE_SEPARATOR, PUNCTUATION_PARAGRAPH_SEPARATOR, baseLF),
  146.         new SpecialMapping(END_OF_STRING, EOS)
  147.     };
  148.  
  149.     private static final boolean CharacterExceptionFlags[] = {
  150.         false,          // kNonCharacter            = 0,
  151.         false,          // kUppercaseLetter         = 1,
  152.         false,          // kLowercaseLetter         = 2,
  153.         false,          // kTitlecaseLetter         = 3,
  154.         false,          // kModifierLetter          = 4,
  155.         true,           // kOtherLetter             = 5,
  156.         false,          // kNonSpacingMark          = 6,
  157.         false,          // kEnclosingMark           = 7,
  158.         false,          // kCombiningSpacingMark    = 8,
  159.         false,          // kDecimalNumber           = 9,
  160.         false,          // kLetterNumber            = 10,
  161.         false,          // kOtherNumber             = 11,
  162.         false,          // kSpaceSeparator          = 12,
  163.         true,           // kLineSeparator           = 13,
  164.         true,           // kParagraphSeparator      = 14,
  165.         true,           // kControlCharacter        = 15,
  166.         false,          // kFormatCharacter         = 16,
  167.         false,          // UNDEFINED                = 17,
  168.         false,          // kPrivateUseCharacter     = 18,
  169.         false,          // kSurrogate               = 19,
  170.         false,          // kDashPunctuation         = 20,
  171.         false,          // kOpenPunctuation         = 21,
  172.         false,          // kClosePunctuation        = 22,
  173.         false,          // kConnectorPunctuation    = 23,
  174.         false,          // kOtherPunctuation        = 24,
  175.         false,          // kMathSymbol              = 25,
  176.         false,          // kCurrencySymbol          = 26,
  177.         false,          // kModifierSymbol          = 27,
  178.         false           // kOtherSymbol             = 28
  179.     };
  180.  
  181.     private static final int kCharacterAsciiValues[] = {
  182.         //  null      soh       stx       etx       eot       enq       ask       bell
  183.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  184.         //  bs        ht        lf      vt        ff        cr      so        si
  185.             baseForm, baseForm, baseLF, baseForm, baseForm, baseCR, baseForm, baseForm,
  186.         //  dle       dc1       dc2       dc3       dc4       nak       syn       etb
  187.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  188.         //  can       em        sub       esc       fs        gs        rs        us
  189.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  190.         //  sp        !         "         #         $         %         &         '
  191.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  192.         //  (         )         *         +         ,         -         .         /
  193.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  194.         //  0         1         2         3         4         5         6         7
  195.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  196.         //  8         9         :         ;         <         =         >         ?
  197.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  198.         //  @         A         B         C         D         E         F         G
  199.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  200.         //  H         I         J         K         L         M         N         O
  201.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  202.         //  P         Q         R         S         T         U         V         W
  203.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  204.         //  X         Y         Z         [         \         ]         ^         _
  205.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  206.         //  `         a         b         c         d         e         f         g
  207.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  208.         //  h         i         j         k         l         m         n         o
  209.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  210.         //  p         q         r         s         t         u         v         w
  211.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  212.         //  x         y         z         {         |         }         ~         del
  213.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  214.         //  ctrl      ctrl      ctrl      ctrl      ctrl      ctrl      ctrl      ctrl
  215.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  216.         //  ctrl      ctrl      ctrl      ctrl      ctrl      ctrl      ctrl      ctrl
  217.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  218.         //  ctrl      ctrl      ctrl      ctrl      ctrl      ctrl      ctrl      ctrl
  219.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  220.         //  ctrl      ctrl      ctrl      ctrl      ctrl      ctrl      ctrl      ctrl
  221.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  222.         //  nbsp      í         ó         ú         ñ         Ñ         ª
  223.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  224.         //  ¿         ⌐         ¬         ½         ¼         ¡         «         »
  225.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  226.         //  ░         ▒         ▓         │         ┤         ╡         ╢         ╖
  227.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  228.         //  ╕         ╣         ║         ╗         ╝         ╜         ╛         ┐
  229.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  230.         //  └         ┴         ┬         ├         ─        ┼         ╞         ╟
  231.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  232.         //  ╚         ╔         ╩         ╦         ╠         ═         ╬         ╧
  233.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  234.         //  ╨         ╤         ╥         ╙         ╘         ╒         ╓         ╫
  235.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  236.         //  ╪         ┘         ┌         █         ▄         ▌         ▐         ▀
  237.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  238.         //  α         ß         Γ         π         Σ         σ         µ         τ
  239.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  240.         //  Φ         Θ         Ω         δ         ∞         φ         ε         ∩
  241.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  242.         //  ≡         ±         ≥         ≤         ⌠         ⌡         ÷        ≈
  243.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm,
  244.         //  °         ∙         ·         √         ⁿ         ²         ■          
  245.             baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm, baseForm
  246.     };
  247.  
  248.     private static final UnicodeClassMapping kCharacterMap
  249.         = new UnicodeClassMapping(kRawMapping, kExceptionChar, CharacterExceptionFlags,
  250.         kCharacterAsciiValues);
  251.  
  252.  
  253.     public WordBreakTable forward()
  254.     {
  255.         return kCharacterForwardTable;
  256.     }
  257.  
  258.     public WordBreakTable backward()
  259.     {
  260.         return kCharacterBackwardTable;
  261.     }
  262.  
  263.     public UnicodeClassMapping map()
  264.     {
  265.         return kCharacterMap;
  266.     }
  267. }
  268.  
  269.